Contexte

Fonction de regroupement

get_cor <- function(pair, normalized.count) {
    tf1 <- str_split_fixed(pair, " ", 2)[1]
    tf2 <- str_split_fixed(pair, " ", 2)[2]
    return(cor(normalized.count[tf1, ], normalized.count[tf2, ], method = "spearman"))
}


group_correlated_TFs <- function(normalized.count, regressors, corr_thr = 0.9, plot = FALSE) {
    
    # calculating correlations for each TF pairs
    pairs <- data.frame(t(combn(regressors, 2)))
    pairs$cor <- sapply(paste(pairs[, 1], pairs[, 2]), get_cor, normalized.count = normalized.count)
    top <- pairs[pairs$cor > corr_thr, ]
    
    
    # graph and communities detection of highly correlated TFs
    net_un <- graph_from_data_frame(top, directed = FALSE)
    louvain <- cluster_louvain(net_un)
    groups <- membership(louvain)
    
    other_tfs <- regressors[!regressors %in% names(groups)]
    
    # Builiding the new consensus variables
    new_reg <- c()
    grouped_regs <- data.frame(matrix(nrow = length(unique(groups)), ncol = length(colnames(normalized.count))))
    colnames(grouped_regs) <- colnames(normalized.count)
    rownames(grouped_regs) <- unique(groups)
    
    grouped_tfs <- c()
    
    for (group in unique(groups)) {
        tfs <- names(groups)[groups == group]
        mean_tf <- colMeans(normalized.count[tfs, ])
        grouped_regs[group, ] <- mean_tf
        
        
        
        # find the negatively correlated regulators to that group, and add them, without
        # using their expression un the mean group expression
        
        for (tf in other_tfs) {
            if (cor(normalized.count[tf, ], mean_tf, method = "spearman") < -corr_thr) {
                print(paste("adding tf ", tf, " to group ", group, "because correlation of", 
                  cor(normalized.count[tf, ], mean_tf, method = "spearman"), "to mean"))
                tfs <- c(tfs, tf)
                # remove this tf from the list so it is not assigned to another group later
                other_tfs <- other_tfs[other_tfs != tf]
                print(length(other_tfs))
            }
        }
        
        new_reg <- c(new_reg, paste0("mean_", paste(tfs, collapse = "-")))
        grouped_tfs <- c(grouped_tfs, tfs)
    }
    rownames(grouped_regs) <- new_reg
    normalized.count <- rbind.data.frame(normalized.count, grouped_regs)
    
    # remove regressors that are grouped from the data
    normalized.count <- normalized.count[!rownames(normalized.count) %in% grouped_tfs, 
        ]
    return(list(counts = normalized.count, membership = groups, new_regressors = c(new_reg, 
        regressors[!regressors %in% grouped_tfs])))
}



source("Funtions/Network_functions.R")

load("./Data/DEGsListsFiltered.RData")
load("./Data/PlnTFBDRegulatorsList.RData")
load("./Data/normalized.count_At.RData")
load("./Data/OntologyAllGenes.RData")


genes <- DEGs[["cnF CnF"]]
print(length(genes))
[1] 1309
# expression data
normalized.count <- normalized.count[, grepl("F", colnames(normalized.count))]

# TFs
regressors = intersect(TF$AGI, genes)

data <- group_correlated_TFs(normalized.count, regressors)
[1] "adding tf  AT4G04890  to group  2 because correlation of -0.951048951048951 to mean"
[1] 55
print(data$new_regressors)
 [1] "mean_AT5G23405-AT5G02810-AT3G57300-AT3G19860-AT3G57230-AT4G22950-AT5G59780-AT1G71030-AT1G10610-AT2G29060-AT1G72650-AT5G39610-AT5G13080"
 [2] "mean_AT2G46790-AT5G15830-AT4G27410"                                                                                                    
 [3] "mean_AT3G48360-AT5G67190-AT1G43700-AT1G13300-AT1G68670-AT3G55770-AT1G21000-AT3G11100-AT5G63160-AT2G18280"                              
 [4] "mean_AT1G13260-AT4G17490-AT4G17500-AT5G07580-AT5G47220-AT5G51190-AT5G61600-AT5G60850-AT1G27730-AT1G25550-AT3G50060-AT4G37260"          
 [5] "mean_AT1G46768-AT3G15210-AT5G47230-AT2G01930-AT5G04340-AT2G45660-AT5G02840-AT5G51860-AT4G27310-AT3G51910-AT3G06380-AT4G04890"          
 [6] "mean_AT1G72360-AT3G16770-AT3G61630"                                                                                                    
 [7] "mean_AT1G78080-AT2G19810"                                                                                                              
 [8] "mean_AT2G47520-AT3G02550"                                                                                                              
 [9] "mean_AT5G61590-AT2G25900-AT3G15500-AT1G76590-AT3G10030"                                                                                
[10] "mean_AT3G19580-AT2G14210-AT3G30260-AT5G51870-AT3G58710-AT1G58100-AT3G47620"                                                            
[11] "mean_AT3G60580-AT1G04990-AT3G02830-AT1G34670-AT3G02940-AT5G16770-AT5G65230"                                                            
[12] "mean_AT1G57560-AT4G38620"                                                                                                              
[13] "mean_AT1G73410-AT1G64000"                                                                                                              
[14] "AT1G05830"                                                                                                                             
[15] "AT5G60100"                                                                                                                             
[16] "AT3G04380"                                                                                                                             
[17] "AT4G02020"                                                                                                                             
[18] "AT1G26680"                                                                                                                             
[19] "AT1G06160"                                                                                                                             
[20] "AT1G22190"                                                                                                                             
[21] "AT1G25560"                                                                                                                             
[22] "AT1G28370"                                                                                                                             
[23] "AT1G68840"                                                                                                                             
[24] "AT2G20880"                                                                                                                             
[25] "AT2G31230"                                                                                                                             
[26] "AT2G41710"                                                                                                                             
[27] "AT2G46310"                                                                                                                             
[28] "AT3G25730"                                                                                                                             
[29] "AT4G25470"                                                                                                                             
[30] "AT4G25490"                                                                                                                             
[31] "AT5G05410"                                                                                                                             
[32] "AT5G07310"                                                                                                                             
[33] "AT5G11590"                                                                                                                             
[34] "AT2G20180"                                                                                                                             
[35] "AT2G28160"                                                                                                                             
[36] "AT3G61420"                                                                                                                             
[37] "AT3G49760"                                                                                                                             
[38] "AT4G36730"                                                                                                                             
[39] "AT1G26590"                                                                                                                             
[40] "AT3G46090"                                                                                                                             
[41] "AT5G59820"                                                                                                                             
[42] "AT1G29600"                                                                                                                             
[43] "AT4G29190"                                                                                                                             
[44] "AT5G42820"                                                                                                                             
[45] "AT1G72830"                                                                                                                             
[46] "AT4G24150"                                                                                                                             
[47] "AT3G03660"                                                                                                                             
[48] "AT4G11880"                                                                                                                             
[49] "AT2G47460"                                                                                                                             
[50] "AT3G24310"                                                                                                                             
[51] "AT5G06100"                                                                                                                             
[52] "AT5G58900"                                                                                                                             
[53] "AT1G18330"                                                                                                                             
[54] "AT1G32510"                                                                                                                             
[55] "AT1G69490"                                                                                                                             
[56] "AT1G77450"                                                                                                                             
[57] "AT2G43000"                                                                                                                             
[58] "AT4G17980"                                                                                                                             
[59] "AT4G28530"                                                                                                                             
[60] "AT5G46590"                                                                                                                             
[61] "AT4G35270"                                                                                                                             
[62] "AT1G64860"                                                                                                                             
[63] "AT5G05550"                                                                                                                             
[64] "AT1G62300"                                                                                                                             
[65] "AT2G21900"                                                                                                                             
[66] "AT3G01970"                                                                                                                             
[67] "AT5G24110"                                                                                                                             
[68] "AT5G41570"                                                                                                                             
groups <- data$new_regressors[grepl("mean", data$new_regressors)]
membership <- data$membership

Profils d’expression des communautés corrélées positivement

library(DIANE)
for (group in unique(membership)) {
    print(draw_profiles(data = normalized.count, membership = membership, k = group, 
        conds = str_split_fixed(colnames(normalized.count), "_", 2)))
    
    print(draw_profiles(data = normalized.count, membership = membership, k = group, 
        expression = "counts", conds = str_split_fixed(colnames(normalized.count), 
            "_", 2)))
}

Valeurs d’importance en retirant tous les TFs d’un groupe

On infère GENIE3 sur les données classiques, sans espionnes ni variables résumées. On s’tintéresse au groupe 13, par exemple.

On crée un graphe avec les 3000 premières arêtes, et on regarde pour le tf1, ses cibles, ainsi que l’union des cibles de tous les autres. On crée aussi une catégorie qui est celle des cibles communes entre le tf1 et les autres.

load("./Data/normalized.count_At.RData")

# expression data
normalized.count <- normalized.count[, grepl("F", colnames(normalized.count))]
mat <- GENIE3(normalized.count, regulators = regressors, targets = genes, nCores = 5)

# au passage verification que la somme des importances pour un gene fait 1
colSums(mat)
AT1G01020 AT1G01090 AT1G01160 AT1G01170 AT1G01470 AT1G01600 AT1G01610 AT1G01770 
        1         1         1         1         1         1         1         1 
AT1G01820 AT1G02300 AT1G02610 AT1G02640 AT1G02730 AT1G02840 AT1G02860 AT1G03060 
        1         1         1         1         1         1         1         1 
AT1G03090 AT1G03457 AT1G03610 AT1G03700 AT1G03850 AT1G03935 AT1G04120 AT1G04220 
        1         1         1         1         1         1         1         1 
AT1G04360 AT1G04990 AT1G05000 AT1G05575 AT1G05700 AT1G05830 AT1G05840 AT1G05890 
        1         1         1         1         1         1         1         1 
AT1G06000 AT1G06160 AT1G06570 AT1G06630 AT1G07135 AT1G08090 AT1G08230 AT1G08630 
        1         1         1         1         1         1         1         1 
AT1G08980 AT1G09070 AT1G09140 AT1G09250 AT1G09280 AT1G09780 AT1G09932 AT1G10070 
        1         1         1         1         1         1         1         1 
AT1G10140 AT1G10220 AT1G10610 AT1G10910 AT1G10920 AT1G11175 AT1G11180 AT1G11185 
        1         1         1         1         1         1         1         1 
AT1G11260 AT1G11530 AT1G11790 AT1G11920 AT1G12010 AT1G12040 AT1G12110 AT1G12440 
        1         1         1         1         1         1         1         1 
AT1G12780 AT1G12805 AT1G13080 AT1G13250 AT1G13260 AT1G13300 AT1G13710 AT1G14200 
        1         1         1         1         1         1         1         1 
AT1G14540 AT1G14550 AT1G14640 
        1         1         1 
 [ reached getOption("max.print") -- omitted 1234 entries ]
links <- getLinkList(mat, reportMax = 3000)

net <- graph_from_data_frame(links, directed = TRUE)


see_importances <- function(group) {
    tfs <- names(membership[membership == group])
    print(tfs)
    print(draw_profiles(data = normalized.count, membership = membership, k = group, 
        expression = "counts", conds = str_split_fixed(colnames(normalized.count), 
            "_", 2)))
    
    tf <- tfs[1]
    others <- tfs[tfs != tf]
    
    
    tf_targets <- as.vector(neighbors(net, tf, mode = "out")$name)
    
    others_targets <- c()
    for (t in others) {
        others_targets <- c(others_targets, neighbors(net, t, mode = "out")$name)
    }
    others_targets <- unique(others_targets)
    print(length(others_targets))
    common <- intersect(tf_targets, others_targets)
    print(length(common))
    
    targets <- unique(c(tf_targets, others_targets))
    data <- data.frame(cibles = targets, type = ifelse(targets %in% tf_targets, ifelse(targets %in% 
        common, "common", "tf1"), "others"))
    data$importance_all_community <- mat[tf, match(data$cibles, colnames(mat))]
    
    print(ggplot(data, aes(x = type, y = importance_all_community, fill = type)) + 
        geom_violin(alpha = 0.7) + geom_jitter() + ggtitle("Importance de tf1 pour les cibles de tf1 ou des autres TFs de sa communauté"))
    
    
    mat_tf1_alone <- GENIE3(normalized.count, regulators = regressors[!regressors %in% 
        others], targets = genes, nCores = 5)
    data$importance_tf1_alone <- mat_tf1_alone[tf, match(data$cibles, colnames(mat_tf1_alone))]
    
    
    print(ggplot(data, aes(x = type, y = importance_tf1_alone, fill = type)) + geom_violin(alpha = 0.7) + 
        geom_jitter() + ggtitle("Importance de tf1 pour les cibles de tf1 ou des autres TFs de sa communauté, sans les autres TFs de sa communauté"))
}

for (group in unique(membership)) {
    see_importances(group)
}
 [1] "AT5G23405" "AT5G02810" "AT3G57300" "AT3G19860" "AT3G57230" "AT4G22950"
 [7] "AT5G59780" "AT1G71030" "AT1G10610" "AT2G29060" "AT1G72650" "AT5G39610"
[13] "AT5G13080"

[1] 160
[1] 32

[1] "AT2G46790" "AT5G15830" "AT4G27410"

[1] 9
[1] 1

 [1] "AT3G48360" "AT5G67190" "AT1G43700" "AT1G13300" "AT1G68670" "AT3G55770"
 [7] "AT1G21000" "AT3G11100" "AT5G63160" "AT2G18280"

[1] 50
[1] 7

 [1] "AT1G13260" "AT4G17490" "AT4G17500" "AT5G07580" "AT5G47220" "AT5G51190"
 [7] "AT5G61600" "AT5G60850" "AT1G27730" "AT1G25550" "AT3G50060" "AT4G37260"

[1] 69
[1] 3

 [1] "AT1G46768" "AT3G15210" "AT5G47230" "AT2G01930" "AT5G04340" "AT2G45660"
 [7] "AT5G02840" "AT5G51860" "AT4G27310" "AT3G51910" "AT3G06380"

[1] 52
[1] 1

[1] "AT1G72360" "AT3G16770" "AT3G61630"

[1] 13
[1] 5

[1] "AT1G78080" "AT2G19810"

[1] 8
[1] 6

[1] "AT2G47520" "AT3G02550"

[1] 15
[1] 7

[1] "AT5G61590" "AT2G25900" "AT3G15500" "AT1G76590" "AT3G10030"

[1] 68
[1] 8

[1] "AT3G19580" "AT2G14210" "AT3G30260" "AT5G51870" "AT3G58710" "AT1G58100"
[7] "AT3G47620"

[1] 154
[1] 85

[1] "AT3G60580" "AT1G04990" "AT3G02830" "AT1G34670" "AT3G02940" "AT5G16770"
[7] "AT5G65230"

[1] 193
[1] 19

[1] "AT1G57560" "AT4G38620"

[1] 51
[1] 23

[1] "AT1G73410" "AT1G64000"

[1] 40
[1] 7

On peut imaginer que dans un nouveau graphge, tf1 volerait les cibles correpondant à la pointe de la partie verte, dépassant le seuil.